/*
Code up TX 1999-2003

Notes 
1. Due suppression of admission data and DOB cant compute exact age for aids admissions - 
   There are only about 250 of these on average among 21-22 year olds per file year except for 2008 when it is several time this
2. There are no separate ecodes prior to 2003 in 2002 and before they are at the end of the ICD-9 list
3. Gender is only missing for 103 observations 
*/

options compress = yes obs = max;

libname tx "C:\DATA OFFLINE\Hospital Texas 01";
libname inter "C:\Data\Inter";

/*How many dates are missing out of about 2.7 million
Year	 	admit_start_of_care 	birth_date
1999 			9918 					9931
2000  			9524   					9542
2001 			9211 					9217
2002			0						6
2003			0						9;
proc freq data = TX.Texas99; tables admit_start_of_care birth_date; run;
proc freq data = TX.Texas00; tables admit_start_of_care birth_date; run;
proc freq data = TX.Texas01; tables admit_start_of_care birth_date; run;
proc freq data = TX.Texas02; tables admit_start_of_care birth_date; run;
proc freq data = TX.Texas03; tables admit_start_of_care birth_date; run;
*/
proc freq data = texas00;
  tables OTH_DIAG_CODE_1;
run;

*Appears to be a missing decimal in the 1999 and 2000 charges files;
proc means data = TX.Texas00 min p1 p10 p90 p95 p99 max mean;
   var TOTAL_CHARGES_23;
run;

proc means data = TX.Texas01 min p1 p10 p90 p95 p99 max mean;
   var TOTAL_CHARGES_23;
run;

proc means data = TX.Texas03 min p1 p10 p90 p95 p99 max mean;
   var TOTAL_CHARGES_23;
run;

%macro pull_TX(file);
data &file. ;
   set TX.&file. (keep = pat_status pat_age admit_start_of_care birth_date ADMITTING_DIAG PRINC_DIAG_CODE 
                         EXTNAL_CAUSE_OF_INJURY SEX_CODE source_of_admission OTH_DIAG_CODE_1-OTH_DIAG_CODE_8
                         TOTAL_CHARGES_23 SOURCE_PAYMENT_CODE_1);
   format ad_date date.;
  *Record the file name and state postal code; 
   file = "&file.";
   state = 'TX';
  *Compute age in months relative to age 21 - the intck function counts the number of starting 
   points of the interval crossed. So an ED visit occurring in the birth month will be 
   have a months_21 value of 0;
  *20000315;
   byear = floor(birth_date/10000); 
   bmonth = floor((birth_date - byear*10000)/100);
   bday = birth_date - 10000*byear -100*bmonth;
   ayear = floor(admit_start_of_care/10000); 
   amonth = floor((admit_start_of_care - 10000*ayear)/100);
   aday = admit_start_of_care - 10000*ayear -100*amonth;
   months_21 = intck('month',mdy(bmonth,14,byear+21),mdy(amonth,15,ayear));
   ad_date = mdy(amonth,15,ayear);
   days_21 = intck('day',mdy(bmonth,bday,byear+21),mdy(amonth,aday,ayear));
   if months_21 = . then bad_date = 1; else bad_date = 0;
   state = 'TX';
   if "&file." = "Texas99" then file_year = 1999;
   if "&file." = "Texas00" then file_year = 2000;
   if "&file." = "Texas01" then file_year = 2001;
   if "&file." = "Texas02" then file_year = 2002;
   if "&file." = "Texas03" then file_year = 2003;

   if source_of_admission = "7" then from_ED = 1; else from_ED = 0;
  *Code up discharge;
   if pat_status = 20 then died = 1; else died = 0;
  *Flag for pregnant;
   preg_related = 0;
   if substr(PRINC_DIAG_CODE,1,2) in ('63','64','65','66','67') then preg_related = 1; 
   if substr(ADMITTING_DIAG,1,2) in ('63','64','65','66','67') then preg_related = 1; 
  *Restrict based on the age in months variable;
   if SEX_CODE = 'M' then female = 0; 
   if SEX_CODE = 'F' then female = 1;
   count = 1;

run;
%mend;
%pull_TX(Texas99);
%pull_TX(Texas00);
%pull_TX(Texas01);
%pull_TX(Texas02);
%pull_TX(Texas03);


data TX_combined;
   set Texas99 Texas00 Texas01 Texas02 Texas03 ;
   *Drop bad dates there are 28722 obs dropped of 13311974 records (0.22 percent);
	if bad_date = 1 then delete;
  *Restricting age range shrinks the sample to  975922;
   if months_21 < -40 or months_21 > 39 then delete;
   *Remove inpatient stays that started before hte time period we are examining leaves 974404 obs;
	if state = 'TX' and ad_date < mdy(1,1,1999) then delete;

  *Extract the ecodes;
   array icd{10} PRINC_DIAG_CODE ADMITTING_DIAG OTH_DIAG_CODE_1-OTH_DIAG_CODE_8;
   array e_dx{10} $5. e_dx1-e_dx10;
   array ec{11} $5. ECODE1-ECODE11;
   i = 1;
   j = 1;
   do until (i > 10);
      if substr(icd{i},1,1) = 'E' then do;
         e_dx{j} = icd{i};
		 j = j + 1;
      end;
      i+ +1; *Increment loop;
   end;
  *Need to do this in two steps otherwise it retains the E code across rows - not sure why it is retaining it;
   do i = 1 to 10;
       ec{i+1} = e_dx{i};
   end;
  *Put this on the first element in the array;
   ec{1}  = EXTNAL_CAUSE_OF_INJURY;
  *Code up alcohol mentions;
   array np_icd{8} OTH_DIAG_CODE_1-OTH_DIAG_CODE_8;
  *Code up alcohol mention;
   alcohol_mention = 0;
   alcohol_mention_oth = 0;
   i = 1; *Start with second ICD code;
   do until (i > 8);
      if substr(np_icd{i},1,3) in ('291','303') or substr(np_icd{i},1,4) = '3050' then alcohol_mention = 1; 
	 *These are other minor mentions of alcohol;
      if substr(np_icd{i},1,4) in ("3575", "4255", "5353", "5710", "5711", "5712", "5713", "7903", "9800", "V113", "V791" )  then alcohol_mention_oth = 1; 
		 i+ +1;
   end;  
  *Code up alcohol primary;
	alcohol = 0;
    if substr(ADMITTING_DIAG,1,3) in ('291','303') or substr(ADMITTING_DIAG,1,4) = '3050' then alcohol = 1; 
    if substr(PRINC_DIAG_CODE,1,3) in ('291','303') or substr(PRINC_DIAG_CODE,1,4) = '3050' then alcohol = 1; 
   *Overall injury category for regressions - need both as can have no E1 but have an E2 due to draw from ICD;
	if ECODE1 ne '' or ECODE2 ne '' then injury = 1; else injury = 0;
   *Source and location of injury;
   inj_by_self_e = 0;
   inj_by_oth_e = 0;
   place = "       ";
  *Code up alcohol or drugs any mention;
   i = 1;
   do until (i > 11);
     *There are very few people that are coded as both (175) and going over all the ecodes only adds a few percent to the number coded; 
      if substr(ec{i},2,2) = '95' then inj_by_self_e = 1; 
      if substr(ec{i},2,2) = '96' then inj_by_oth_e = 1;  
	 *Place often coded for assaults;
      if substr(ec{i},2,3) = '849' then place = ec{i};
      if substr(ec{i},2,4) in ('8600','8601') then alcohol_mention_oth = 1; 
		 i+ +1;
   end;
  *Create mutually exclusive categories precedence - alcohol,;
    if alcohol = 0 and inj_by_oth_e = 1 then inj_by_oth  = 1; else inj_by_oth = 0;
    if alcohol = 0 and inj_by_oth_e = 0 and  inj_by_self_e = 1 then inj_by_self  = 1; else inj_by_self = 0;
    if alcohol = 0 and inj_by_oth_e = 0 and  inj_by_self_e = 0 and  injury = 1 then inj_accident  = 1; else inj_accident = 0;
    if alcohol = 0 and inj_by_oth_e = 0 and  inj_by_self_e = 0 and  injury = 0 and alcohol_mention = 1 then alcohol_on_dx = 1; else alcohol_on_dx = 0; *Alcohol not marked as primary cause but mentioned;
    if alcohol = 0 and inj_by_oth_e = 0 and  inj_by_self_e = 0 and  injury = 0 and alcohol_mention = 0 then illness = 1; else illness = 0; *Residual category;
  *Create combined categories;
   alcohol_any = alcohol + alcohol_on_dx;
   injury_or_alc =  inj_by_oth + inj_by_self + inj_accident + alcohol + alcohol_on_dx ;
  *Make sure the list is exhaustive;
	visit = alcohol + inj_by_self + inj_by_oth + inj_accident + alcohol_on_dx + illness;
  *Flag for insurance;
   if SOURCE_PAYMENT_CODE_1 = 'D'	then Medicaid = 1; else Medicaid = 0;
   if SOURCE_PAYMENT_CODE_1 in ("F","G")	then Private = 1; else Private = 0;
   if SOURCE_PAYMENT_CODE_1 = "A"	then Self_pay = 1; else Self_pay = 0;
   if SOURCE_PAYMENT_CODE_1 not in ("A","D","F","G") then other_ins = 1; else other_ins = 0;
  *In 1999 and 2000 files these include cents with no decimal;
   if file in ("Texas99","Texas00") then TOTCHG = TOTAL_CHARGES_23/100;
   if file = "Texas01" then TOTCHG = .; *This year is a blend of decimal and no decimal;
   if file in ("Texas02","Texas03") then TOTCHG = TOTAL_CHARGES_23;
  *Set the two negative charges to 0;
   if TOTCHG < 0 then TOTCHG = .;
   if TOTCHG = . then miss_chg = 1; else miss_chg = 0; 
   if female = . then miss_female = 1; else miss_female = 0; 
run;

proc means data = TX_combined ;
   var  visit alcohol_any injury_or_alc inj_by_self  inj_by_oth inj_accident illness died medicaid private self_pay other_ins  preg_related   female miss_chg miss_female alcohol_mention;
run;

proc means data = TX_combined noprint;
   var visit alcohol_any injury_or_alc inj_by_self  inj_by_oth inj_accident illness died medicaid private self_pay other_ins  preg_related   female miss_chg miss_female alcohol_mention;
   class state ad_date;
   output out = stability_TX mean = visit alcohol_any injury_or_alc inj_by_self  inj_by_oth inj_accident illness died medicaid private self_pay other_ins  preg_related   female miss_chg miss_female alcohol_mention;
run;

proc export data=stability_TX outfile= "E:\temp\P07 Check stability TX.dta" replace;
run;

libname ext "E:\temp";


proc means data = TX_combined min p1 p10 p90 p95 p99 max mean;
   var TOTAL_CHARGES_23 TOTCHG;
   class file_year;
run;

proc freq data = TX_combined;
   tables  TOTCHG;
   where TOTCHG < 0;
run;

data ext.Texas_inp;
   set TX_combined;
run;
